<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>crawler</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            >MetaGETA (Metadata Gathering, Extraction and Transformation)</th>
          </tr></table></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        Module&nbsp;crawler
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="crawler-pysrc.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<h1 class="epydoc">Source Code for <a href="crawler-module.html">Module crawler</a></h1>
<pre class="py-src">
<a name="L1"></a><tt class="py-lineno">  1</tt>  <tt class="py-line"><tt class="py-comment"># -*- coding: latin-1 -*-</tt> </tt>
<a name="L2"></a><tt class="py-lineno">  2</tt>  <tt class="py-line"><tt class="py-comment"># Copyright (c) 2009 Australian Government, Department of Environment, Heritage, Water and the Arts</tt> </tt>
<a name="L3"></a><tt class="py-lineno">  3</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L4"></a><tt class="py-lineno">  4</tt>  <tt class="py-line"><tt class="py-comment"># Permission is hereby granted, free of charge, to any person obtaining a copy</tt> </tt>
<a name="L5"></a><tt class="py-lineno">  5</tt>  <tt class="py-line"><tt class="py-comment"># of this software and associated documentation files (the "Software"), to deal</tt> </tt>
<a name="L6"></a><tt class="py-lineno">  6</tt>  <tt class="py-line"><tt class="py-comment"># in the Software without restriction, including without limitation the rights</tt> </tt>
<a name="L7"></a><tt class="py-lineno">  7</tt>  <tt class="py-line"><tt class="py-comment"># to use, copy, modify, merge, publish, distribute, sublicense, and/or sell</tt> </tt>
<a name="L8"></a><tt class="py-lineno">  8</tt>  <tt class="py-line"><tt class="py-comment"># copies of the Software, and to permit persons to whom the Software is</tt> </tt>
<a name="L9"></a><tt class="py-lineno">  9</tt>  <tt class="py-line"><tt class="py-comment"># furnished to do so, subject to the following conditions:</tt> </tt>
<a name="L10"></a><tt class="py-lineno"> 10</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L11"></a><tt class="py-lineno"> 11</tt>  <tt class="py-line"><tt class="py-comment"># The above copyright notice and this permission notice shall be included in</tt> </tt>
<a name="L12"></a><tt class="py-lineno"> 12</tt>  <tt class="py-line"><tt class="py-comment"># all copies or substantial portions of the Software.</tt> </tt>
<a name="L13"></a><tt class="py-lineno"> 13</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L14"></a><tt class="py-lineno"> 14</tt>  <tt class="py-line"><tt class="py-comment"># THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR</tt> </tt>
<a name="L15"></a><tt class="py-lineno"> 15</tt>  <tt class="py-line"><tt class="py-comment"># IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,</tt> </tt>
<a name="L16"></a><tt class="py-lineno"> 16</tt>  <tt class="py-line"><tt class="py-comment"># FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE</tt> </tt>
<a name="L17"></a><tt class="py-lineno"> 17</tt>  <tt class="py-line"><tt class="py-comment"># AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER</tt> </tt>
<a name="L18"></a><tt class="py-lineno"> 18</tt>  <tt class="py-line"><tt class="py-comment"># LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,</tt> </tt>
<a name="L19"></a><tt class="py-lineno"> 19</tt>  <tt class="py-line"><tt class="py-comment"># OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN</tt> </tt>
<a name="L20"></a><tt class="py-lineno"> 20</tt>  <tt class="py-line"><tt class="py-comment"># THE SOFTWARE.</tt> </tt>
<a name="L21"></a><tt class="py-lineno"> 21</tt>  <tt class="py-line"> </tt>
<a name="L22"></a><tt class="py-lineno"> 22</tt>  <tt class="py-line"><tt class="py-docstring">'''</tt> </tt>
<a name="L23"></a><tt class="py-lineno"> 23</tt>  <tt class="py-line"><tt class="py-docstring">Iterator for metadata crawling.</tt> </tt>
<a name="L24"></a><tt class="py-lineno"> 24</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L25"></a><tt class="py-lineno"> 25</tt>  <tt class="py-line"><tt class="py-docstring">Example:</tt> </tt>
<a name="L26"></a><tt class="py-lineno"> 26</tt>  <tt class="py-line"><tt class="py-docstring">    &gt;&gt;&gt; import crawler</tt> </tt>
<a name="L27"></a><tt class="py-lineno"> 27</tt>  <tt class="py-line"><tt class="py-docstring">    &gt;&gt;&gt; Crawler=crawler.Crawler('Some directory to crawl')</tt> </tt>
<a name="L28"></a><tt class="py-lineno"> 28</tt>  <tt class="py-line"><tt class="py-docstring">    &gt;&gt;&gt; #Loop thru dataset objects returned by Crawler</tt> </tt>
<a name="L29"></a><tt class="py-lineno"> 29</tt>  <tt class="py-line"><tt class="py-docstring">    &gt;&gt;&gt; for dataset in Crawler:</tt> </tt>
<a name="L30"></a><tt class="py-lineno"> 30</tt>  <tt class="py-line"><tt class="py-docstring">    &gt;&gt;&gt;     metadata=dataset.metadata</tt> </tt>
<a name="L31"></a><tt class="py-lineno"> 31</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L32"></a><tt class="py-lineno"> 32</tt>  <tt class="py-line"><tt class="py-docstring">@todo:  </tt> </tt>
<a name="L33"></a><tt class="py-lineno"> 33</tt>  <tt class="py-line"><tt class="py-docstring">    - Make this faster!!! It's verrry slow on large filesystems...</tt> </tt>
<a name="L34"></a><tt class="py-lineno"> 34</tt>  <tt class="py-line"><tt class="py-docstring">    - Explore removing regular expression and searching using fnmatch instead</tt> </tt>
<a name="L35"></a><tt class="py-lineno"> 35</tt>  <tt class="py-line"><tt class="py-docstring">    - Can this be rewritten to yield files as they're found instead of building a complete list of files first? Probably.</tt> </tt>
<a name="L36"></a><tt class="py-lineno"> 36</tt>  <tt class="py-line"><tt class="py-docstring">      However, the multiple loops are there to ensure certain types of files are dealt with first, but perhaps that logic</tt> </tt>
<a name="L37"></a><tt class="py-lineno"> 37</tt>  <tt class="py-line"><tt class="py-docstring">      needs to be handled by the formats library</tt> </tt>
<a name="L38"></a><tt class="py-lineno"> 38</tt>  <tt class="py-line"><tt class="py-docstring">'''</tt> </tt>
<a name="L39"></a><tt class="py-lineno"> 39</tt>  <tt class="py-line"> </tt>
<a name="L40"></a><tt class="py-lineno"> 40</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt id="link-0" class="py-name" targets="Module utilities=utilities-module.html"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-0', 'utilities', 'link-0');">utilities</a></tt> </tt>
<a name="L41"></a><tt class="py-lineno"> 41</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt id="link-1" class="py-name" targets="Package formats=formats-module.html"><a title="formats" class="py-name" href="#" onclick="return doclink('link-1', 'formats', 'link-1');">formats</a></tt> </tt>
<a name="L42"></a><tt class="py-lineno"> 42</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">re</tt> </tt>
<a name="L43"></a><tt class="py-lineno"> 43</tt>  <tt class="py-line"> </tt>
<a name="Crawler"></a><div id="Crawler-def"><a name="L44"></a><tt class="py-lineno"> 44</tt> <a class="py-toggle" href="#" id="Crawler-toggle" onclick="return toggle('Crawler');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="crawler.Crawler-class.html">Crawler</a><tt class="py-op">:</tt> </tt>
</div><div id="Crawler-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="Crawler-expanded"><a name="L45"></a><tt class="py-lineno"> 45</tt>  <tt class="py-line">    <tt class="py-docstring">''' Iterator for metadata crawling'''</tt> </tt>
<a name="Crawler.__init__"></a><div id="Crawler.__init__-def"><a name="L46"></a><tt class="py-lineno"> 46</tt> <a class="py-toggle" href="#" id="Crawler.__init__-toggle" onclick="return toggle('Crawler.__init__');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="crawler.Crawler-class.html#__init__">__init__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt><tt class="py-param">dir</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Crawler.__init__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Crawler.__init__-expanded"><a name="L47"></a><tt class="py-lineno"> 47</tt>  <tt class="py-line">        <tt class="py-docstring">''' Iterator for metadata crawling</tt> </tt>
<a name="L48"></a><tt class="py-lineno"> 48</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L49"></a><tt class="py-lineno"> 49</tt>  <tt class="py-line"><tt class="py-docstring">            @type  dir: C{str}</tt> </tt>
<a name="L50"></a><tt class="py-lineno"> 50</tt>  <tt class="py-line"><tt class="py-docstring">            @param dir: The directory to start the metadata crawl.</tt> </tt>
<a name="L51"></a><tt class="py-lineno"> 51</tt>  <tt class="py-line"><tt class="py-docstring">        '''</tt> </tt>
<a name="L52"></a><tt class="py-lineno"> 52</tt>  <tt class="py-line"> </tt>
<a name="L53"></a><tt class="py-lineno"> 53</tt>  <tt class="py-line">        <tt id="link-2" class="py-name" targets="Variable formats.__default__.format_regex=formats.__default__-module.html#format_regex,Variable formats.ali_hyperion.format_regex=formats.ali_hyperion-module.html#format_regex,Variable formats.alos.format_regex=formats.alos-module.html#format_regex,Variable formats.aster.format_regex=formats.aster-module.html#format_regex,Variable formats.ccrs.format_regex=formats.ccrs-module.html#format_regex,Variable formats.digitalglobe.format_regex=formats.digitalglobe-module.html#format_regex,Variable formats.ecw.format_regex=formats.ecw-module.html#format_regex,Variable formats.ecwp.format_regex=formats.ecwp-module.html#format_regex,Variable formats.envi.format_regex=formats.envi-module.html#format_regex,Variable formats.esribil.format_regex=formats.esribil-module.html#format_regex,Variable formats.esrigrid.format_regex=formats.esrigrid-module.html#format_regex,Variable formats.fast_l7a.format_regex=formats.fast_l7a-module.html#format_regex,Variable formats.format_regex=formats-module.html#format_regex,Variable formats.jp2.format_regex=formats.jp2-module.html#format_regex,Variable formats.netcdf.format_regex=formats.netcdf-module.html#format_regex,Variable formats.nitf.format_regex=formats.nitf-module.html#format_regex,Variable formats.spot_cap.format_regex=formats.spot_cap-module.html#format_regex,Variable formats.spot_dimap.format_regex=formats.spot_dimap-module.html#format_regex"><a title="formats.__default__.format_regex
formats.ali_hyperion.format_regex
formats.alos.format_regex
formats.aster.format_regex
formats.ccrs.format_regex
formats.digitalglobe.format_regex
formats.ecw.format_regex
formats.ecwp.format_regex
formats.envi.format_regex
formats.esribil.format_regex
formats.esrigrid.format_regex
formats.fast_l7a.format_regex
formats.format_regex
formats.jp2.format_regex
formats.netcdf.format_regex
formats.nitf.format_regex
formats.spot_cap.format_regex
formats.spot_dimap.format_regex" class="py-name" href="#" onclick="return doclink('link-2', 'format_regex', 'link-2');">format_regex</a></tt>  <tt class="py-op">=</tt> <tt id="link-3" class="py-name"><a title="formats" class="py-name" href="#" onclick="return doclink('link-3', 'formats', 'link-1');">formats</a></tt><tt class="py-op">.</tt><tt id="link-4" class="py-name"><a title="formats.__default__.format_regex
formats.ali_hyperion.format_regex
formats.alos.format_regex
formats.aster.format_regex
formats.ccrs.format_regex
formats.digitalglobe.format_regex
formats.ecw.format_regex
formats.ecwp.format_regex
formats.envi.format_regex
formats.esribil.format_regex
formats.esrigrid.format_regex
formats.fast_l7a.format_regex
formats.format_regex
formats.jp2.format_regex
formats.netcdf.format_regex
formats.nitf.format_regex
formats.spot_cap.format_regex
formats.spot_dimap.format_regex" class="py-name" href="#" onclick="return doclink('link-4', 'format_regex', 'link-2');">format_regex</a></tt> </tt>
<a name="L54"></a><tt class="py-lineno"> 54</tt>  <tt class="py-line">        <tt class="py-name">dir</tt><tt class="py-op">=</tt><tt id="link-5" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-5', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-6" class="py-name" targets="Function utilities.uncpath()=utilities-module.html#uncpath"><a title="utilities.uncpath" class="py-name" href="#" onclick="return doclink('link-6', 'uncpath', 'link-6');">uncpath</a></tt><tt class="py-op">(</tt><tt id="link-7" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-7', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-8" class="py-name" targets="Function utilities.realpath()=utilities-module.html#realpath"><a title="utilities.realpath" class="py-name" href="#" onclick="return doclink('link-8', 'realpath', 'link-8');">realpath</a></tt><tt class="py-op">(</tt><tt id="link-9" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-9', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-10" class="py-name" targets="Function utilities.normcase()=utilities-module.html#normcase"><a title="utilities.normcase" class="py-name" href="#" onclick="return doclink('link-10', 'normcase', 'link-10');">normcase</a></tt><tt class="py-op">(</tt><tt id="link-11" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-11', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-12" class="py-name" targets="Function utilities.encode()=utilities-module.html#encode"><a title="utilities.encode" class="py-name" href="#" onclick="return doclink('link-12', 'encode', 'link-12');">encode</a></tt><tt class="py-op">(</tt><tt class="py-name">dir</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L55"></a><tt class="py-lineno"> 55</tt>  <tt class="py-line">        <tt class="py-comment">#Build a dict of matching files and regexes then sort according to the priority of the regex formats </tt> </tt>
<a name="L56"></a><tt class="py-lineno"> 56</tt>  <tt class="py-line">        <tt class="py-name">fileformats</tt><tt class="py-op">=</tt><tt class="py-op">{</tt><tt class="py-op">}</tt> </tt>
<a name="L57"></a><tt class="py-lineno"> 57</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">f</tt> <tt class="py-keyword">in</tt> <tt id="link-13" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-13', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-14" class="py-name" targets="Class utilities.rglob=utilities.rglob-class.html"><a title="utilities.rglob" class="py-name" href="#" onclick="return doclink('link-14', 'rglob', 'link-14');">rglob</a></tt><tt class="py-op">(</tt><tt class="py-name">dir</tt><tt class="py-op">,</tt><tt class="py-string">'|'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt id="link-15" class="py-name"><a title="formats.__default__.format_regex
formats.ali_hyperion.format_regex
formats.alos.format_regex
formats.aster.format_regex
formats.ccrs.format_regex
formats.digitalglobe.format_regex
formats.ecw.format_regex
formats.ecwp.format_regex
formats.envi.format_regex
formats.esribil.format_regex
formats.esrigrid.format_regex
formats.fast_l7a.format_regex
formats.format_regex
formats.jp2.format_regex
formats.netcdf.format_regex
formats.nitf.format_regex
formats.spot_cap.format_regex
formats.spot_dimap.format_regex" class="py-name" href="#" onclick="return doclink('link-15', 'format_regex', 'link-2');">format_regex</a></tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">True</tt><tt class="py-op">,</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">IGNORECASE</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L58"></a><tt class="py-lineno"> 58</tt>  <tt class="py-line">            <tt class="py-comment">#Use latin-1 encoding to fix Issue 20</tt> </tt>
<a name="L59"></a><tt class="py-lineno"> 59</tt>  <tt class="py-line">            <tt class="py-name">f</tt><tt class="py-op">=</tt><tt id="link-16" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-16', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-17" class="py-name"><a title="utilities.realpath" class="py-name" href="#" onclick="return doclink('link-17', 'realpath', 'link-8');">realpath</a></tt><tt class="py-op">(</tt><tt id="link-18" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-18', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-19" class="py-name"><a title="utilities.normcase" class="py-name" href="#" onclick="return doclink('link-19', 'normcase', 'link-10');">normcase</a></tt><tt class="py-op">(</tt><tt id="link-20" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-20', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-21" class="py-name"><a title="utilities.encode" class="py-name" href="#" onclick="return doclink('link-21', 'encode', 'link-12');">encode</a></tt><tt class="py-op">(</tt><tt class="py-name">f</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L60"></a><tt class="py-lineno"> 60</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt id="link-22" class="py-name" targets="Variable formats.r=formats-module.html#r"><a title="formats.r" class="py-name" href="#" onclick="return doclink('link-22', 'r', 'link-22');">r</a></tt> <tt class="py-keyword">in</tt> <tt id="link-23" class="py-name"><a title="formats.__default__.format_regex
formats.ali_hyperion.format_regex
formats.alos.format_regex
formats.aster.format_regex
formats.ccrs.format_regex
formats.digitalglobe.format_regex
formats.ecw.format_regex
formats.ecwp.format_regex
formats.envi.format_regex
formats.esribil.format_regex
formats.esrigrid.format_regex
formats.fast_l7a.format_regex
formats.format_regex
formats.jp2.format_regex
formats.netcdf.format_regex
formats.nitf.format_regex
formats.spot_cap.format_regex
formats.spot_dimap.format_regex" class="py-name" href="#" onclick="return doclink('link-23', 'format_regex', 'link-2');">format_regex</a></tt><tt class="py-op">:</tt> </tt>
<a name="L61"></a><tt class="py-lineno"> 61</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">search</tt><tt class="py-op">(</tt><tt id="link-24" class="py-name"><a title="formats.r" class="py-name" href="#" onclick="return doclink('link-24', 'r', 'link-22');">r</a></tt><tt class="py-op">,</tt><tt class="py-name">f</tt><tt class="py-op">,</tt><tt class="py-name">re</tt><tt class="py-op">.</tt><tt class="py-name">IGNORECASE</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L62"></a><tt class="py-lineno"> 62</tt>  <tt class="py-line">                    <tt class="py-keyword">if</tt> <tt class="py-name">fileformats</tt><tt class="py-op">.</tt><tt class="py-name">has_key</tt><tt class="py-op">(</tt><tt id="link-25" class="py-name"><a title="formats.r" class="py-name" href="#" onclick="return doclink('link-25', 'r', 'link-22');">r</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt><tt class="py-name">fileformats</tt><tt class="py-op">[</tt><tt id="link-26" class="py-name"><a title="formats.r" class="py-name" href="#" onclick="return doclink('link-26', 'r', 'link-22');">r</a></tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">f</tt><tt class="py-op">)</tt> </tt>
<a name="L63"></a><tt class="py-lineno"> 63</tt>  <tt class="py-line">                    <tt class="py-keyword">else</tt><tt class="py-op">:</tt><tt class="py-name">fileformats</tt><tt class="py-op">[</tt><tt id="link-27" class="py-name"><a title="formats.r" class="py-name" href="#" onclick="return doclink('link-27', 'r', 'link-22');">r</a></tt><tt class="py-op">]</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-name">f</tt><tt class="py-op">]</tt> </tt>
<a name="L64"></a><tt class="py-lineno"> 64</tt>  <tt class="py-line">                    <tt class="py-keyword">break</tt> </tt>
<a name="L65"></a><tt class="py-lineno"> 65</tt>  <tt class="py-line">        <tt class="py-name">files</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L66"></a><tt class="py-lineno"> 66</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt id="link-28" class="py-name"><a title="formats.r" class="py-name" href="#" onclick="return doclink('link-28', 'r', 'link-22');">r</a></tt> <tt class="py-keyword">in</tt> <tt id="link-29" class="py-name"><a title="formats.__default__.format_regex
formats.ali_hyperion.format_regex
formats.alos.format_regex
formats.aster.format_regex
formats.ccrs.format_regex
formats.digitalglobe.format_regex
formats.ecw.format_regex
formats.ecwp.format_regex
formats.envi.format_regex
formats.esribil.format_regex
formats.esrigrid.format_regex
formats.fast_l7a.format_regex
formats.format_regex
formats.jp2.format_regex
formats.netcdf.format_regex
formats.nitf.format_regex
formats.spot_cap.format_regex
formats.spot_dimap.format_regex" class="py-name" href="#" onclick="return doclink('link-29', 'format_regex', 'link-2');">format_regex</a></tt><tt class="py-op">:</tt> </tt>
<a name="L67"></a><tt class="py-lineno"> 67</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">fileformats</tt><tt class="py-op">.</tt><tt class="py-name">has_key</tt><tt class="py-op">(</tt><tt id="link-30" class="py-name"><a title="formats.r" class="py-name" href="#" onclick="return doclink('link-30', 'r', 'link-22');">r</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt><tt class="py-name">files</tt><tt class="py-op">.</tt><tt class="py-name">extend</tt><tt class="py-op">(</tt><tt class="py-name">fileformats</tt><tt class="py-op">[</tt><tt id="link-31" class="py-name"><a title="formats.r" class="py-name" href="#" onclick="return doclink('link-31', 'r', 'link-22');">r</a></tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L68"></a><tt class="py-lineno"> 68</tt>  <tt class="py-line"> </tt>
<a name="L69"></a><tt class="py-lineno"> 69</tt>  <tt class="py-line">        <tt class="py-comment">#Class vars</tt> </tt>
<a name="L70"></a><tt class="py-lineno"> 70</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">errors</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-op">]</tt> <tt class="py-comment">#A list of files that couldn't be opened. Contains a tuple with file name, error info, debug info</tt> </tt>
<a name="L71"></a><tt class="py-lineno"> 71</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">files</tt><tt class="py-op">=</tt><tt class="py-name">files</tt> </tt>
<a name="L72"></a><tt class="py-lineno"> 72</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">file</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> </tt>
<a name="L73"></a><tt class="py-lineno"> 73</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">filecount</tt><tt class="py-op">=</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">files</tt><tt class="py-op">)</tt> </tt>
</div><a name="L74"></a><tt class="py-lineno"> 74</tt>  <tt class="py-line"> </tt>
<a name="Crawler.__iter__"></a><div id="Crawler.__iter__-def"><a name="L75"></a><tt class="py-lineno"> 75</tt> <a class="py-toggle" href="#" id="Crawler.__iter__-toggle" onclick="return toggle('Crawler.__iter__');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="crawler.Crawler-class.html#__iter__">__iter__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Crawler.__iter__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Crawler.__iter__-expanded"><a name="L76"></a><tt class="py-lineno"> 76</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">self</tt> </tt>
</div><a name="L77"></a><tt class="py-lineno"> 77</tt>  <tt class="py-line"> </tt>
<a name="Crawler.next"></a><div id="Crawler.next-def"><a name="L78"></a><tt class="py-lineno"> 78</tt> <a class="py-toggle" href="#" id="Crawler.next-toggle" onclick="return toggle('Crawler.next');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="crawler.Crawler-class.html#next">next</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Crawler.next-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Crawler.next-expanded"><a name="L79"></a><tt class="py-lineno"> 79</tt>  <tt class="py-line">        <tt class="py-docstring">''' @rtype:  C{Dataset}</tt> </tt>
<a name="L80"></a><tt class="py-lineno"> 80</tt>  <tt class="py-line"><tt class="py-docstring">            @return: Return the next Dataset or raise StopIteration</tt> </tt>
<a name="L81"></a><tt class="py-lineno"> 81</tt>  <tt class="py-line"><tt class="py-docstring">        '''</tt> </tt>
<a name="L82"></a><tt class="py-lineno"> 82</tt>  <tt class="py-line">        <tt class="py-comment">#Have we finished?</tt> </tt>
<a name="L83"></a><tt class="py-lineno"> 83</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">files</tt><tt class="py-op">)</tt> <tt class="py-op">==</tt> <tt class="py-number">0</tt><tt class="py-op">:</tt> </tt>
<a name="L84"></a><tt class="py-lineno"> 84</tt>  <tt class="py-line">            <tt class="py-keyword">raise</tt> <tt class="py-name">StopIteration</tt> </tt>
<a name="L85"></a><tt class="py-lineno"> 85</tt>  <tt class="py-line"> </tt>
<a name="L86"></a><tt class="py-lineno"> 86</tt>  <tt class="py-line">        <tt class="py-comment">#Get the first file</tt> </tt>
<a name="L87"></a><tt class="py-lineno"> 87</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">file</tt><tt class="py-op">=</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">files</tt><tt class="py-op">.</tt><tt id="link-32" class="py-name" targets="Method formats.__dataset__.idict.pop()=formats.__dataset__.idict-class.html#pop"><a title="formats.__dataset__.idict.pop" class="py-name" href="#" onclick="return doclink('link-32', 'pop', 'link-32');">pop</a></tt><tt class="py-op">(</tt><tt class="py-number">0</tt><tt class="py-op">)</tt> </tt>
<a name="L88"></a><tt class="py-lineno"> 88</tt>  <tt class="py-line">        <tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L89"></a><tt class="py-lineno"> 89</tt>  <tt class="py-line">            <tt class="py-comment">#Open it</tt> </tt>
<a name="L90"></a><tt class="py-lineno"> 90</tt>  <tt class="py-line">            <tt class="py-name">ds</tt><tt class="py-op">=</tt><tt id="link-33" class="py-name"><a title="formats" class="py-name" href="#" onclick="return doclink('link-33', 'formats', 'link-1');">formats</a></tt><tt class="py-op">.</tt><tt id="link-34" class="py-name" targets="Function formats.Open()=formats-module.html#Open"><a title="formats.Open" class="py-name" href="#" onclick="return doclink('link-34', 'Open', 'link-34');">Open</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">file</tt><tt class="py-op">)</tt> </tt>
<a name="L91"></a><tt class="py-lineno"> 91</tt>  <tt class="py-line"> </tt>
<a name="L92"></a><tt class="py-lineno"> 92</tt>  <tt class="py-line">            <tt class="py-comment">#Remove any files in our filelist that occur in the dataset's filelist and decrement the filecount</tt> </tt>
<a name="L93"></a><tt class="py-lineno"> 93</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt class="py-name">f</tt> <tt class="py-keyword">in</tt> <tt class="py-name">ds</tt><tt class="py-op">.</tt><tt id="link-35" class="py-name" targets="Variable formats.__dataset__.Dataset.filelist=formats.__dataset__.Dataset-class.html#filelist"><a title="formats.__dataset__.Dataset.filelist" class="py-name" href="#" onclick="return doclink('link-35', 'filelist', 'link-35');">filelist</a></tt><tt class="py-op">:</tt> </tt>
<a name="L94"></a><tt class="py-lineno"> 94</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">f</tt> <tt class="py-keyword">in</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">files</tt><tt class="py-op">:</tt> </tt>
<a name="L95"></a><tt class="py-lineno"> 95</tt>  <tt class="py-line">                    <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">files</tt><tt class="py-op">.</tt><tt class="py-name">remove</tt><tt class="py-op">(</tt><tt class="py-name">f</tt><tt class="py-op">)</tt> </tt>
<a name="L96"></a><tt class="py-lineno"> 96</tt>  <tt class="py-line">                    <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">filecount</tt><tt class="py-op">-=</tt><tt class="py-number">1</tt> </tt>
<a name="L97"></a><tt class="py-lineno"> 97</tt>  <tt class="py-line">            <tt class="py-comment">#Fin!</tt> </tt>
<a name="L98"></a><tt class="py-lineno"> 98</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">ds</tt> </tt>
<a name="L99"></a><tt class="py-lineno"> 99</tt>  <tt class="py-line">        <tt class="py-keyword">except</tt><tt class="py-op">:</tt> </tt>
<a name="L100"></a><tt class="py-lineno">100</tt>  <tt class="py-line">            <tt class="py-comment">#decrement the filecount and append to the errors list</tt> </tt>
<a name="L101"></a><tt class="py-lineno">101</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">filecount</tt><tt class="py-op">-=</tt><tt class="py-number">1</tt> </tt>
<a name="L102"></a><tt class="py-lineno">102</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">errors</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">file</tt><tt class="py-op">,</tt> </tt>
<a name="L103"></a><tt class="py-lineno">103</tt>  <tt class="py-line">                                <tt id="link-36" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-36', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-37" class="py-name" targets="Function utilities.ExceptionInfo()=utilities-module.html#ExceptionInfo"><a title="utilities.ExceptionInfo" class="py-name" href="#" onclick="return doclink('link-37', 'ExceptionInfo', 'link-37');">ExceptionInfo</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L104"></a><tt class="py-lineno">104</tt>  <tt class="py-line">                                <tt id="link-38" class="py-name"><a title="utilities" class="py-name" href="#" onclick="return doclink('link-38', 'utilities', 'link-0');">utilities</a></tt><tt class="py-op">.</tt><tt id="link-39" class="py-name"><a title="utilities.ExceptionInfo" class="py-name" href="#" onclick="return doclink('link-39', 'ExceptionInfo', 'link-37');">ExceptionInfo</a></tt><tt class="py-op">(</tt><tt class="py-number">10</tt><tt class="py-op">)</tt> </tt>
<a name="L105"></a><tt class="py-lineno">105</tt>  <tt class="py-line">                        <tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L106"></a><tt class="py-lineno">106</tt>  <tt class="py-line"> </tt>
<a name="L107"></a><tt class="py-lineno">107</tt>  <tt class="py-line">            <tt class="py-comment">#Skip to the next file so we don't stop the iteration</tt> </tt>
<a name="L108"></a><tt class="py-lineno">108</tt>  <tt class="py-line">            <tt class="py-comment">#Exceptions here will keep recursing until we find a</tt> </tt>
<a name="L109"></a><tt class="py-lineno">109</tt>  <tt class="py-line">            <tt class="py-comment">#file we can open or run out of files.</tt> </tt>
<a name="L110"></a><tt class="py-lineno">110</tt>  <tt class="py-line">            <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-40" class="py-name" targets="Method crawler.Crawler.next()=crawler.Crawler-class.html#next"><a title="crawler.Crawler.next" class="py-name" href="#" onclick="return doclink('link-40', 'next', 'link-40');">next</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div></div><a name="L111"></a><tt class="py-lineno">111</tt>  <tt class="py-line"> </tt><script type="text/javascript">
<!--
expandto(location.href);
// -->
</script>
</pre>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            >MetaGETA (Metadata Gathering, Extraction and Transformation)</th>
          </tr></table></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    &nbsp;
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>
